import { renderAsync } from 'docx-preview'

export interface WordParseResult {
  html: string
  tables: TableData[]
  images: ImageData[]
  metadata: DocumentMetadata
}

export interface TableCell {
  content: string
  rowspan?: number
  colspan?: number
  isHeader?: boolean
  width?: string
}

export interface TableData {
  rows: TableCell[][]
  headers?: string[]
  columnWidths?: string[]
}

export interface ImageData {
  name: string
  data: string // base64
  type: string
}

export interface DocumentMetadata {
  title?: string
  author?: string
  created?: Date
  modified?: Date
  wordCount: number
  pageCount?: number
}


class WordParserService {
  /**
   * 处理Word文档 - 统一方法，支持解析和预览
   * @param file Word文档文件
   * @param container 可选的预览容器元素
   * @param options 处理选项
   * @returns 解析结果（如果不需要预览，container为null）
   */
  async processWordDocument(
    file: File,
    container?: HTMLElement | null,
    options: {
      previewOnly?: boolean
    } = {}
  ): Promise<WordParseResult | void> {
    const { previewOnly = false } = options

    try {
      const arrayBuffer = await file.arrayBuffer()
      let targetContainer: HTMLElement
      let isTemporaryContainer = false

      if (container) {
        // 使用提供的容器进行预览
        targetContainer = container
        targetContainer.innerHTML = ''
      } else {
        // 创建临时容器进行数据提取
        targetContainer = document.createElement('div')
        targetContainer.style.position = 'absolute'
        targetContainer.style.left = '-9999px'
        targetContainer.style.top = '-9999px'
        document.body.appendChild(targetContainer)
        isTemporaryContainer = true
      }

      // 使用docx-preview渲染文档
      await renderAsync(arrayBuffer, targetContainer, undefined, {
        className: 'docx-preview',
        inWrapper: true,
        ignoreWidth: false,
        ignoreHeight: false,
        ignoreFonts: false,
        breakPages: true,
        ignoreLastRenderedPageBreak: true,
        experimental: true,
        trimXmlDeclaration: true,
        useBase64URL: false
      })

      // 如果只需要预览，直接返回
      if (previewOnly) {
        return
      }

      // 获取渲染后的HTML
      const html = targetContainer.innerHTML

      // 提取纯文本
      const text = this.extractTextFromHtml(html)

      // 解析表格数据
      const tables = this.extractTablesFromHtml(html)

      // 提取图片数据
      const images = this.extractImagesFromHtml(html)

      // 生成元数据
      const metadata = this.generateMetadata(file, text)

      // 清理临时容器
      if (isTemporaryContainer) {
        document.body.removeChild(targetContainer)
      }

      return {
        html,
        tables,
        images,
        metadata
      }
    } catch (error) {
      console.error('Word文档处理失败:', error)
      if (container) {
        container.innerHTML = '<p style="color: red; padding: 20px;">文档处理失败，请检查文件格式</p>'
      }
      throw new Error('Word文档处理失败，请检查文件格式')
    }
  }

  /**
   * 解析Word文档为文本和HTML（向后兼容方法）
   * @param file Word文档文件
   * @returns 解析结果
   */
  async parseWordDocument(file: File): Promise<WordParseResult> {
    const result = await this.processWordDocument(file, null)
    return result as WordParseResult
  }



  /**
   * 从HTML中提取纯文本
   */
  private extractTextFromHtml(html: string): string {
    const tempDiv = document.createElement('div')
    tempDiv.innerHTML = html
    return tempDiv.textContent || tempDiv.innerText || ''
  }

  /**
   * 从HTML中提取表格数据
   */
  private extractTablesFromHtml(html: string): TableData[] {
    const tables: TableData[] = []
    const tempDiv = document.createElement('div')
    tempDiv.innerHTML = html

    const tableElements = tempDiv.querySelectorAll('table')
    
    tableElements.forEach(table => {
      const rows: TableCell[][] = []
      const rowElements = table.querySelectorAll('tr')
      
      // 提取列宽信息
      const columnWidths = this.extractColumnWidths(table as HTMLElement)
      
      rowElements.forEach(row => {
        const cells: TableCell[] = []
        const cellElements = row.querySelectorAll('td, th')
        
        cellElements.forEach((cell, cellIndex) => {
          const cellElement = cell as HTMLElement
          const rowspan = cellElement.getAttribute('rowspan')
          const colspan = cellElement.getAttribute('colspan')
          const isHeader = cellElement.tagName.toLowerCase() === 'th'
          
          // 提取单元格内容，保留HTML格式
          const content = cellElement.innerHTML
            .replace(/&nbsp;/g, ' ')
            .replace(/&amp;/g, '&')
            .replace(/&lt;/g, '<')
            .replace(/&gt;/g, '>')
            .replace(/&quot;/g, '"')
            .trim()
          
          // 获取单元格宽度
          const cellWidth = this.getCellWidth(cellElement, columnWidths, cellIndex)
          
          cells.push({
            content,
            rowspan: rowspan ? parseInt(rowspan) : undefined,
            colspan: colspan ? parseInt(colspan) : undefined,
            isHeader,
            width: cellWidth
          })
        })
        
        if (cells.length > 0) {
          rows.push(cells)
        }
      })
      
      if (rows.length > 0) {
        tables.push({ 
          rows,
          columnWidths: columnWidths.length > 0 ? columnWidths : undefined
        })
      }
    })
    
    return tables
  }

  /**
   * 提取表格的列宽信息
   */
  private extractColumnWidths(table: HTMLElement): string[] {
    const columnWidths: string[] = []
    
    // 查找 colgroup 元素
    const colgroup = table.querySelector('colgroup')
    if (colgroup) {
      const cols = colgroup.querySelectorAll('col')
      cols.forEach(col => {
        const width = col.getAttribute('width') || col.getAttribute('style')
        if (width) {
          // 提取宽度值，支持多种格式
          const widthMatch = width.match(/(\d+(?:\.\d+)?)(px|%|pt|em|rem)?/)
          if (widthMatch) {
            const value = widthMatch[1]
            const unit = widthMatch[2] || 'px'
            columnWidths.push(`${value}${unit}`)
          } else {
            columnWidths.push(width)
          }
        } else {
          columnWidths.push('auto')
        }
      })
    }
    
    return columnWidths
  }

  /**
   * 获取单元格宽度
   */
  private getCellWidth(cellElement: HTMLElement, columnWidths: string[], cellIndex: number): string | undefined {
    // 首先检查单元格本身的宽度
    const cellStyle = cellElement.getAttribute('style')
    if (cellStyle) {
      const widthMatch = cellStyle.match(/width\s*:\s*([^;]+)/)
      if (widthMatch) {
        return widthMatch[1].trim()
      }
    }
    
    // 检查单元格的 width 属性
    const cellWidth = cellElement.getAttribute('width')
    if (cellWidth) {
      return cellWidth
    }
    
    // 使用列宽信息
    if (columnWidths.length > 0 && cellIndex < columnWidths.length) {
      return columnWidths[cellIndex]
    }
    
    return undefined
  }

  /**
   * 从HTML中提取图片数据
   */
  private extractImagesFromHtml(html: string): ImageData[] {
    const images: ImageData[] = []
    const tempDiv = document.createElement('div')
    tempDiv.innerHTML = html

    const imgElements = tempDiv.querySelectorAll('img')

    imgElements.forEach((img, index) => {
      const imgElement = img as HTMLImageElement
      const src = imgElement.src

      // 如果是base64图片
      if (src.startsWith('data:')) {
        const [header, data] = src.split(',')
        const mimeType = header.match(/data:([^;]+)/)?.[1] || 'image/png'

        images.push({
          name: `image_${index + 1}`,
          data: data,
          type: mimeType
        })
      }
    })

    return images
  }

  /**
   * 提取表格数据（保留原方法以兼容性）
   */
  public extractTables(html: string): TableData[] {
    const tables: TableData[] = []
    const tableRegex = /<table[^>]*>([\s\S]*?)<\/table>/gi
    let match

    while ((match = tableRegex.exec(html)) !== null) {
      const tableHtml = match[1]
      const rows: TableCell[][] = []

      // 提取行
      const rowRegex = /<tr[^>]*>([\s\S]*?)<\/tr>/gi
      let rowMatch

      while ((rowMatch = rowRegex.exec(tableHtml)) !== null) {
        const rowHtml = rowMatch[1]
        const cells: TableCell[] = []

        // 提取单元格，支持td和th标签
        const cellRegex = /<(t[dh])([^>]*)>([\s\S]*?)<\/\1>/gi
        let cellMatch

        while ((cellMatch = cellRegex.exec(rowHtml)) !== null) {
          const tagName = cellMatch[1] // td 或 th
          const attributes = cellMatch[2] // 属性字符串
          const cellContent = cellMatch[3] // 单元格内容

          // 解析属性
          const rowspan = this.extractAttribute(attributes, 'rowspan')
          const colspan = this.extractAttribute(attributes, 'colspan')

          // 清理单元格内容
          const cellText = cellContent
            // .replace(/<[^>]*>/g, '') // 移除HTML标签
            .replace(/&nbsp;/g, ' ') // 替换&nbsp;
            .replace(/&amp;/g, '&') // 替换&amp;
            .replace(/&lt;/g, '<') // 替换&lt;
            .replace(/&gt;/g, '>') // 替换&gt;
            .replace(/&quot;/g, '"') // 替换&quot;
            .trim()

          cells.push({
            content: cellText,
            rowspan: rowspan ? parseInt(rowspan) : undefined,
            colspan: colspan ? parseInt(colspan) : undefined,
            isHeader: tagName === 'th'
          })
        }

        if (cells.length > 0) {
          rows.push(cells)
        }
      }

      if (rows.length > 0) {
        // 提取表头（第一行）
        // const headers = rows[0]?.map(cell => cell.content) || []

        tables.push({
          rows,
          // headers: headers.length > 0 ? headers : undefined
        })
      }
    }

    return tables
  }

  /**
   * 从属性字符串中提取指定属性的值
   */
  private extractAttribute(attributes: string, attrName: string): string | null {
    const regex = new RegExp(`${attrName}\\s*=\\s*["']?([^"'>\\s]+)["']?`, 'i')
    const match = attributes.match(regex)
    return match ? match[1] : null
  }

  /**
   * 提取图片数据（保留原方法以兼容性）
   */
  private extractImages(messages: unknown[]): ImageData[] {
    const images: ImageData[] = []

    messages.forEach(message => {
      if (typeof message === 'object' && message !== null && 'type' in message) {
        const msg = message as { type: string; name?: string; data?: string }
        if (msg.type === 'image') {
          images.push({
            name: msg.name || 'image',
            data: msg.data || '',
            type: msg.type || 'image/png'
          })
        }
      }
    })

    return images
  }

  /**
   * 生成文档元数据
   */
  private generateMetadata(file: File, text: string): DocumentMetadata {
    const wordCount = text.split(/\s+/).filter(word => word.length > 0).length

    return {
      title: file.name.replace(/\.[^/.]+$/, ''), // 移除扩展名
      wordCount,
      created: new Date(),
      modified: new Date()
    }
  }

  /**
   * 预览Word文档 - 使用docx-preview实现，保留原始样式（向后兼容方法）
   * @param file Word文档文件
   * @param container 预览容器元素
   */
  async previewWordDocument(file: File, container: HTMLElement): Promise<void> {
    await this.processWordDocument(file, container, { previewOnly: true })
  }

}

// 创建单例实例
export const wordParserService = new WordParserService()
export default wordParserService
